1) Data collection and loading¶

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
# Reading eeg data from csv file to a dataframe called "eeg_data"
eeg_data = pd.read_csv("EEG_data.csv")
eeg_data.head()
Out[2]:
SubjectID VideoID Attention Mediation Raw Delta Theta Alpha1 Alpha2 Beta1 Beta2 Gamma1 Gamma2 predefinedlabel user-definedlabeln
0 0.0 0.0 56.0 43.0 278.0 301963.0 90612.0 33735.0 23991.0 27946.0 45097.0 33228.0 8293.0 0.0 0.0
1 0.0 0.0 40.0 35.0 -50.0 73787.0 28083.0 1439.0 2240.0 2746.0 3687.0 5293.0 2740.0 0.0 0.0
2 0.0 0.0 47.0 48.0 101.0 758353.0 383745.0 201999.0 62107.0 36293.0 130536.0 57243.0 25354.0 0.0 0.0
3 0.0 0.0 47.0 57.0 -5.0 2012240.0 129350.0 61236.0 17084.0 11488.0 62462.0 49960.0 33932.0 0.0 0.0
4 0.0 0.0 44.0 53.0 -8.0 1005145.0 354328.0 37102.0 88881.0 45307.0 99603.0 44790.0 29749.0 0.0 0.0
In [3]:
# demographic_info.csv dataset is loaded into dataframe called "demogrphic_data"
demographic_data = pd.read_csv("demographic_info.csv")
demographic_data.head()
Out[3]:
subject ID age ethnicity gender
0 0 25 Han Chinese M
1 1 24 Han Chinese M
2 2 31 English M
3 3 28 Han Chinese F
4 4 24 Bengali M

2) Data exploration¶

In [4]:
eeg_data.shape # eeg_data contains (12811 rows, 15 columns)
Out[4]:
(12811, 15)
In [5]:
eeg_data.columns
Out[5]:
Index(['SubjectID', 'VideoID', 'Attention', 'Mediation', 'Raw', 'Delta',
       'Theta', 'Alpha1', 'Alpha2', 'Beta1', 'Beta2', 'Gamma1', 'Gamma2',
       'predefinedlabel', 'user-definedlabeln'],
      dtype='object')
In [6]:
demographic_data.columns
Out[6]:
Index(['subject ID', ' age', ' ethnicity', ' gender'], dtype='object')
In [7]:
demographic_data.shape # demographic data contains (10 rows and 4 columns)
Out[7]:
(10, 4)
In [8]:
# Renaming the column "subject ID" to "Subject ID"
demographic_data.rename(columns={"subject ID": "SubjectID"},inplace=True)
In [9]:
demographic_data
Out[9]:
SubjectID age ethnicity gender
0 0 25 Han Chinese M
1 1 24 Han Chinese M
2 2 31 English M
3 3 28 Han Chinese F
4 4 24 Bengali M
5 5 24 Han Chinese M
6 6 24 Han Chinese M
7 7 25 Han Chinese M
8 8 25 Han Chinese M
9 9 24 Han Chinese F

Merging eeg_data and demographic_info data using inner join on Column "SubjectID"¶

In [10]:
merged_data_before = pd.merge(eeg_data,demographic_data,how="inner",on="SubjectID")
merged_data_before.head()
Out[10]:
SubjectID VideoID Attention Mediation Raw Delta Theta Alpha1 Alpha2 Beta1 Beta2 Gamma1 Gamma2 predefinedlabel user-definedlabeln age ethnicity gender
0 0.0 0.0 56.0 43.0 278.0 301963.0 90612.0 33735.0 23991.0 27946.0 45097.0 33228.0 8293.0 0.0 0.0 25 Han Chinese M
1 0.0 0.0 40.0 35.0 -50.0 73787.0 28083.0 1439.0 2240.0 2746.0 3687.0 5293.0 2740.0 0.0 0.0 25 Han Chinese M
2 0.0 0.0 47.0 48.0 101.0 758353.0 383745.0 201999.0 62107.0 36293.0 130536.0 57243.0 25354.0 0.0 0.0 25 Han Chinese M
3 0.0 0.0 47.0 57.0 -5.0 2012240.0 129350.0 61236.0 17084.0 11488.0 62462.0 49960.0 33932.0 0.0 0.0 25 Han Chinese M
4 0.0 0.0 44.0 53.0 -8.0 1005145.0 354328.0 37102.0 88881.0 45307.0 99603.0 44790.0 29749.0 0.0 0.0 25 Han Chinese M
In [11]:
# Renaming columns
merged_data_before.rename(columns={" gender":"Gender"," ethnicity": "Ethnicity"," age":"Age"},inplace=True)
In [12]:
merged_data_before["Ethnicity"]
Out[12]:
0        Han Chinese
1        Han Chinese
2        Han Chinese
3        Han Chinese
4        Han Chinese
            ...     
12806    Han Chinese
12807    Han Chinese
12808    Han Chinese
12809    Han Chinese
12810    Han Chinese
Name: Ethnicity, Length: 12811, dtype: object
In [13]:
merged_data_before["Gender"]
Out[13]:
0        M
1        M
2        M
3        M
4        M
        ..
12806    F
12807    F
12808    F
12809    F
12810    F
Name: Gender, Length: 12811, dtype: object
In [14]:
merged_data_before.head()
Out[14]:
SubjectID VideoID Attention Mediation Raw Delta Theta Alpha1 Alpha2 Beta1 Beta2 Gamma1 Gamma2 predefinedlabel user-definedlabeln Age Ethnicity Gender
0 0.0 0.0 56.0 43.0 278.0 301963.0 90612.0 33735.0 23991.0 27946.0 45097.0 33228.0 8293.0 0.0 0.0 25 Han Chinese M
1 0.0 0.0 40.0 35.0 -50.0 73787.0 28083.0 1439.0 2240.0 2746.0 3687.0 5293.0 2740.0 0.0 0.0 25 Han Chinese M
2 0.0 0.0 47.0 48.0 101.0 758353.0 383745.0 201999.0 62107.0 36293.0 130536.0 57243.0 25354.0 0.0 0.0 25 Han Chinese M
3 0.0 0.0 47.0 57.0 -5.0 2012240.0 129350.0 61236.0 17084.0 11488.0 62462.0 49960.0 33932.0 0.0 0.0 25 Han Chinese M
4 0.0 0.0 44.0 53.0 -8.0 1005145.0 354328.0 37102.0 88881.0 45307.0 99603.0 44790.0 29749.0 0.0 0.0 25 Han Chinese M
In [15]:
# Total rows, columns
merged_data_before.shape
Out[15]:
(12811, 18)
In [16]:
merged_data_before_save = merged_data_before.to_csv("./data generated/merged_data_before_dummies.csv",index=False)

3) Feature transformation¶

Search for NaN values in all columns, if present Impute them => Imputation¶

In [20]:
merged_data.isna().sum()
Out[20]:
SubjectID                0
VideoID                  0
Attention                0
Mediation                0
Raw                      0
Delta                    0
Theta                    0
Alpha1                   0
Alpha2                   0
Beta1                    0
Beta2                    0
Gamma1                   0
Gamma2                   0
predefinedlabel          0
user-definedlabeln       0
Age                      0
Ethnicity_Bengali        0
Ethnicity_English        0
Ethnicity_Han Chinese    0
Gender_F                 0
Gender_M                 0
dtype: int64

👆 From this we infer that there are no NaN values in our dataset

Converting non numerical values into numerical values => Feature encoding¶

In [75]:
merged_data_before.dtypes
Out[75]:
SubjectID             float64
VideoID               float64
Attention             float64
Mediation             float64
Raw                   float64
Delta                 float64
Theta                 float64
Alpha1                float64
Alpha2                float64
Beta1                 float64
Beta2                 float64
Gamma1                float64
Gamma2                float64
predefinedlabel       float64
user-definedlabeln    float64
Age                     int64
Ethnicity              object
Gender                 object
dtype: object
In [17]:
merged_data_before["Gender"].dtype , merged_data_before["Ethnicity"].dtype
Out[17]:
(dtype('O'), dtype('O'))
In [18]:
# Gender and Ethnicity are non numerical (categorical features)
merged_data = pd.get_dummies(merged_data_before)
merged_data.head()
Out[18]:
SubjectID VideoID Attention Mediation Raw Delta Theta Alpha1 Alpha2 Beta1 ... Gamma1 Gamma2 predefinedlabel user-definedlabeln Age Ethnicity_Bengali Ethnicity_English Ethnicity_Han Chinese Gender_F Gender_M
0 0.0 0.0 56.0 43.0 278.0 301963.0 90612.0 33735.0 23991.0 27946.0 ... 33228.0 8293.0 0.0 0.0 25 0 0 1 0 1
1 0.0 0.0 40.0 35.0 -50.0 73787.0 28083.0 1439.0 2240.0 2746.0 ... 5293.0 2740.0 0.0 0.0 25 0 0 1 0 1
2 0.0 0.0 47.0 48.0 101.0 758353.0 383745.0 201999.0 62107.0 36293.0 ... 57243.0 25354.0 0.0 0.0 25 0 0 1 0 1
3 0.0 0.0 47.0 57.0 -5.0 2012240.0 129350.0 61236.0 17084.0 11488.0 ... 49960.0 33932.0 0.0 0.0 25 0 0 1 0 1
4 0.0 0.0 44.0 53.0 -8.0 1005145.0 354328.0 37102.0 88881.0 45307.0 ... 44790.0 29749.0 0.0 0.0 25 0 0 1 0 1

5 rows × 21 columns

In [19]:
merged_data_save = merged_data.to_csv("./data generated/merged_data_after_dummies.csv",index=False)

Feature Scaling - yet to be completed¶

In [21]:
eeg_data.predefinedlabel.value_counts()
Out[21]:
0.0    6662
1.0    6149
Name: predefinedlabel, dtype: int64
In [22]:
eeg_data["user-definedlabeln"].value_counts()
Out[22]:
1.0    6567
0.0    6244
Name: user-definedlabeln, dtype: int64

After each session, the student rated his/her confusion level on a scale of 1-7, where one corresponded to the least confusing and seven corresponded to the most confusing. These labels if further normalized into labels of whether the students are confused or not. This label is offered as self-labelled confusion in addition to our predefined label of confusion.

Exploratory Data Analysis¶

In [23]:
# Description of non-numerical columns

merged_data.describe()
Out[23]:
SubjectID VideoID Attention Mediation Raw Delta Theta Alpha1 Alpha2 Beta1 ... Gamma1 Gamma2 predefinedlabel user-definedlabeln Age Ethnicity_Bengali Ethnicity_English Ethnicity_Han Chinese Gender_F Gender_M
count 12811.000000 12811.000000 12811.000000 12811.000000 12811.000000 1.281100e+04 1.281100e+04 1.281100e+04 1.281100e+04 1.281100e+04 ... 1.281100e+04 1.281100e+04 12811.000000 12811.000000 12811.000000 12811.000000 12811.000000 12811.000000 12811.000000 12811.000000
mean 4.487394 4.390602 41.313871 47.182656 65.570760 6.057853e+05 1.680526e+05 4.138435e+04 3.318339e+04 2.431837e+04 ... 2.959255e+04 1.441597e+04 0.479978 0.512606 25.409960 0.101085 0.100226 0.798689 0.200999 0.799001
std 2.865373 2.913232 23.152953 22.655976 597.921035 6.376236e+05 2.441346e+05 7.243082e+04 5.831410e+04 3.837968e+04 ... 7.982637e+04 3.603523e+04 0.499618 0.499861 2.205148 0.301453 0.300313 0.400996 0.400763 0.400763
min 0.000000 0.000000 0.000000 0.000000 -2048.000000 4.480000e+02 1.700000e+01 2.000000e+00 2.000000e+00 3.000000e+00 ... 1.000000e+00 2.000000e+00 0.000000 0.000000 24.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 2.000000 2.000000 27.000000 37.000000 -14.000000 9.806400e+04 2.691750e+04 6.838000e+03 6.852000e+03 6.140000e+03 ... 4.058000e+03 2.167500e+03 0.000000 0.000000 24.000000 0.000000 0.000000 1.000000 0.000000 1.000000
50% 4.000000 4.000000 43.000000 51.000000 35.000000 3.954870e+05 8.133100e+04 1.750000e+04 1.495900e+04 1.281800e+04 ... 9.763000e+03 5.116000e+03 0.000000 1.000000 25.000000 0.000000 0.000000 1.000000 0.000000 1.000000
75% 7.000000 7.000000 57.000000 63.000000 90.000000 9.166230e+05 2.052760e+05 4.477950e+04 3.455050e+04 2.740600e+04 ... 2.488800e+04 1.266950e+04 1.000000 1.000000 25.000000 0.000000 0.000000 1.000000 0.000000 1.000000
max 9.000000 9.000000 100.000000 100.000000 2047.000000 3.964663e+06 3.007802e+06 1.369955e+06 1.016913e+06 1.067778e+06 ... 1.972506e+06 1.348117e+06 1.000000 1.000000 31.000000 1.000000 1.000000 1.000000 1.000000 1.000000

8 rows × 21 columns

In [24]:
merged_data.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 12811 entries, 0 to 12810
Data columns (total 21 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   SubjectID              12811 non-null  float64
 1   VideoID                12811 non-null  float64
 2   Attention              12811 non-null  float64
 3   Mediation              12811 non-null  float64
 4   Raw                    12811 non-null  float64
 5   Delta                  12811 non-null  float64
 6   Theta                  12811 non-null  float64
 7   Alpha1                 12811 non-null  float64
 8   Alpha2                 12811 non-null  float64
 9   Beta1                  12811 non-null  float64
 10  Beta2                  12811 non-null  float64
 11  Gamma1                 12811 non-null  float64
 12  Gamma2                 12811 non-null  float64
 13  predefinedlabel        12811 non-null  float64
 14  user-definedlabeln     12811 non-null  float64
 15  Age                    12811 non-null  int64  
 16  Ethnicity_Bengali      12811 non-null  uint8  
 17  Ethnicity_English      12811 non-null  uint8  
 18  Ethnicity_Han Chinese  12811 non-null  uint8  
 19  Gender_F               12811 non-null  uint8  
 20  Gender_M               12811 non-null  uint8  
dtypes: float64(15), int64(1), uint8(5)
memory usage: 1.7 MB
In [25]:
attention_greater_0 = merged_data[merged_data["Attention"]  > 0.0]
attention_greater_0.head()
Out[25]:
SubjectID VideoID Attention Mediation Raw Delta Theta Alpha1 Alpha2 Beta1 ... Gamma1 Gamma2 predefinedlabel user-definedlabeln Age Ethnicity_Bengali Ethnicity_English Ethnicity_Han Chinese Gender_F Gender_M
0 0.0 0.0 56.0 43.0 278.0 301963.0 90612.0 33735.0 23991.0 27946.0 ... 33228.0 8293.0 0.0 0.0 25 0 0 1 0 1
1 0.0 0.0 40.0 35.0 -50.0 73787.0 28083.0 1439.0 2240.0 2746.0 ... 5293.0 2740.0 0.0 0.0 25 0 0 1 0 1
2 0.0 0.0 47.0 48.0 101.0 758353.0 383745.0 201999.0 62107.0 36293.0 ... 57243.0 25354.0 0.0 0.0 25 0 0 1 0 1
3 0.0 0.0 47.0 57.0 -5.0 2012240.0 129350.0 61236.0 17084.0 11488.0 ... 49960.0 33932.0 0.0 0.0 25 0 0 1 0 1
4 0.0 0.0 44.0 53.0 -8.0 1005145.0 354328.0 37102.0 88881.0 45307.0 ... 44790.0 29749.0 0.0 0.0 25 0 0 1 0 1

5 rows × 21 columns

In [26]:
attention_greater_0.shape  #rows reduced from 12811 to 11388 after applying "Attention > 0.0"
Out[26]:
(11388, 21)

UNIVARIATE ANALYSIS¶

Univariate analysis using Histogram¶

In [77]:
attention_greater_0.hist(figsize=(15,15));
plt.show();
plt.tight_layout();
plt.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\univariate-analy.png");
<Figure size 640x480 with 0 Axes>

Attention-Meditation.PNG

From the histogram generated for all columms it is clear that,¶
Attention and Meditation follows a Normal distribution since they form a bell shaped curve¶

Univariate analysis using box plot¶

In [28]:
attention_greater_0.plot(kind='box',figsize=(15,15),subplots=True,layout=(5,5))
plt.show()
plt.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\univariate-boxplot.png")
<Figure size 640x480 with 0 Axes>
In [29]:
plt.figure(figsize=(5,5))
labels= ["confused","notconfused"]
merged_data["user-definedlabeln"].value_counts().plot(kind='pie',labels=labels,autopct='%1.1f%%')

plt.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\user-definedLabel.png")
plt.show()
In [30]:
pd.crosstab(merged_data_before.Gender,merged_data_before["user-definedlabeln"])
Out[30]:
user-definedlabeln 0.0 1.0
Gender
F 1297 1278
M 4947 5289
In [31]:
pd.DataFrame(merged_data_before.Gender.value_counts())
Out[31]:
Gender
M 10236
F 2575
In [32]:
pd.crosstab(merged_data_before.Gender,merged_data_before["user-definedlabeln"]).plot(kind="bar")
plt.legend(["Not Confused","Confused"])
plt.ylabel("Count")
plt.title("Distribution over gender");
plt.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\targeDistr-gender.png")

MULTIVARIATE ANALYSIS¶

  • establish a sense of relationship of all variables with one another
In [33]:
pd.plotting.scatter_matrix(merged_data,figsize=(20,20));
plt.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\multivariate-anal.png")
plt.tight_layout();

Correlation matrix¶

In [34]:
import seaborn as sns
In [35]:
plt.figure(figsize=(15,15))
correlation_matx = merged_data.corr()
sns.heatmap(correlation_matx,square=True,cmap="YlGnBu",annot=True,linewidth=0.5,fmt=".2f",cbar=False);
plt.title("Correlation matrix",fontsize=20)
plt.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\correlation-matrix.png")
In [36]:
merged_data.head()
Out[36]:
SubjectID VideoID Attention Mediation Raw Delta Theta Alpha1 Alpha2 Beta1 ... Gamma1 Gamma2 predefinedlabel user-definedlabeln Age Ethnicity_Bengali Ethnicity_English Ethnicity_Han Chinese Gender_F Gender_M
0 0.0 0.0 56.0 43.0 278.0 301963.0 90612.0 33735.0 23991.0 27946.0 ... 33228.0 8293.0 0.0 0.0 25 0 0 1 0 1
1 0.0 0.0 40.0 35.0 -50.0 73787.0 28083.0 1439.0 2240.0 2746.0 ... 5293.0 2740.0 0.0 0.0 25 0 0 1 0 1
2 0.0 0.0 47.0 48.0 101.0 758353.0 383745.0 201999.0 62107.0 36293.0 ... 57243.0 25354.0 0.0 0.0 25 0 0 1 0 1
3 0.0 0.0 47.0 57.0 -5.0 2012240.0 129350.0 61236.0 17084.0 11488.0 ... 49960.0 33932.0 0.0 0.0 25 0 0 1 0 1
4 0.0 0.0 44.0 53.0 -8.0 1005145.0 354328.0 37102.0 88881.0 45307.0 ... 44790.0 29749.0 0.0 0.0 25 0 0 1 0 1

5 rows × 21 columns

In [37]:
merged_data["user-definedlabeln"].value_counts()
Out[37]:
1.0    6567
0.0    6244
Name: user-definedlabeln, dtype: int64
In [38]:
merged_data["predefinedlabel"].value_counts()
Out[38]:
0.0    6662
1.0    6149
Name: predefinedlabel, dtype: int64
In [39]:
merged_data.Age.value_counts()
Out[39]:
24    6394
25    3819
28    1314
31    1284
Name: Age, dtype: int64
In [40]:
merged_data["Attention"].value_counts()
Out[40]:
0.0     1423
43.0     407
41.0     396
38.0     392
48.0     390
        ... 
93.0      23
3.0       17
94.0      17
97.0      14
96.0       8
Name: Attention, Length: 61, dtype: int64
In [41]:
attention_greater_0
Out[41]:
SubjectID VideoID Attention Mediation Raw Delta Theta Alpha1 Alpha2 Beta1 ... Gamma1 Gamma2 predefinedlabel user-definedlabeln Age Ethnicity_Bengali Ethnicity_English Ethnicity_Han Chinese Gender_F Gender_M
0 0.0 0.0 56.0 43.0 278.0 301963.0 90612.0 33735.0 23991.0 27946.0 ... 33228.0 8293.0 0.0 0.0 25 0 0 1 0 1
1 0.0 0.0 40.0 35.0 -50.0 73787.0 28083.0 1439.0 2240.0 2746.0 ... 5293.0 2740.0 0.0 0.0 25 0 0 1 0 1
2 0.0 0.0 47.0 48.0 101.0 758353.0 383745.0 201999.0 62107.0 36293.0 ... 57243.0 25354.0 0.0 0.0 25 0 0 1 0 1
3 0.0 0.0 47.0 57.0 -5.0 2012240.0 129350.0 61236.0 17084.0 11488.0 ... 49960.0 33932.0 0.0 0.0 25 0 0 1 0 1
4 0.0 0.0 44.0 53.0 -8.0 1005145.0 354328.0 37102.0 88881.0 45307.0 ... 44790.0 29749.0 0.0 0.0 25 0 0 1 0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
12806 9.0 9.0 64.0 38.0 -39.0 127574.0 9951.0 709.0 21732.0 3872.0 ... 2598.0 960.0 1.0 0.0 24 0 0 1 1 0
12807 9.0 9.0 61.0 35.0 -275.0 323061.0 797464.0 153171.0 145805.0 39829.0 ... 36574.0 10010.0 1.0 0.0 24 0 0 1 1 0
12808 9.0 9.0 60.0 29.0 -426.0 680989.0 154296.0 40068.0 39122.0 10966.0 ... 20427.0 2024.0 1.0 0.0 24 0 0 1 1 0
12809 9.0 9.0 60.0 29.0 -84.0 366269.0 27346.0 11444.0 9932.0 1939.0 ... 12323.0 1764.0 1.0 0.0 24 0 0 1 1 0
12810 9.0 9.0 64.0 29.0 -49.0 1164555.0 1184366.0 50014.0 124208.0 10634.0 ... 22133.0 4482.0 1.0 0.0 24 0 0 1 1 0

11388 rows × 21 columns

In [42]:
fig,(ax1) = plt.subplots()

ax1.scatter(attention_greater_0.Attention[attention_greater_0["user-definedlabeln"]==1],
          attention_greater_0.Alpha1[attention_greater_0["user-definedlabeln"]==1],
          color="orange");
ax1.scatter(attention_greater_0.Attention[attention_greater_0["user-definedlabeln"]==0],
          attention_greater_0.Alpha1[attention_greater_0["user-definedlabeln"]==0],
          color="lightgreen");

ax1.set_xlabel("Attention")
ax1.set_ylabel("Alpha1");
ax1.legend(["Confused","Not confused"])
ax1.set_title("Attention vs Alpha");
# ax2.set_xlabel("Attention-Not Confused")
# ax2.set_ylabel("Alpha1");

# Confusion related to alpha wave (attentive-alpha wave decrease)
# Alpha -> `Passive attention`
fig.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\attention-alpha.png")
In [43]:
fig,(ax1) = plt.subplots()

ax1.scatter(attention_greater_0.Attention[attention_greater_0["user-definedlabeln"]==1],
          attention_greater_0.Beta1[attention_greater_0["user-definedlabeln"]==1],
          color="orange");
ax1.scatter(attention_greater_0.Attention[attention_greater_0["user-definedlabeln"]==0],
          attention_greater_0.Beta1[attention_greater_0["user-definedlabeln"]==0],
          color="lightgreen");

ax1.set_xlabel("Attention")
ax1.set_ylabel("Beta1");
ax1.legend(["Confused","Not confused"])
ax1.set_title("Attention vs Beta");
# ax2.set_xlabel("Attention-Not Confused")
# ax2.set_ylabel("Alpha1");

# Confusion related to Beta1 wave 
# Beta - external attention, busy
fig.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\attention-beta.png")
In [44]:
fig,(ax1) = plt.subplots()

ax1.scatter(attention_greater_0.Attention[attention_greater_0["user-definedlabeln"]==1],
          attention_greater_0.Delta[attention_greater_0["user-definedlabeln"]==1],
          color="orange");
ax1.scatter(attention_greater_0.Attention[attention_greater_0["user-definedlabeln"]==0],
          attention_greater_0.Delta[attention_greater_0["user-definedlabeln"]==0],
          color="lightgreen");

ax1.set_xlabel("Attention")
ax1.set_ylabel("Delta");
ax1.legend(["Confused","Not confused"])
ax1.set_title("Attention vs Delta");
# ax2.set_xlabel("Attention-Not Confused")
# ax2.set_ylabel("Alpha1");

# Confusion related to delta wave
# Delta - sleep,dreaming
fig.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\attention-delta.png")
In [45]:
from matplotlib.colors import ListedColormap
import seaborn as sns

sns.color_palette()
Out[45]:
In [46]:
orlg = ['#FFA500','#90EE90']
sns.palplot(sns.color_palette(orlg))
In [47]:
pkb = ['#FFB6C1','#0000FF']
sns.palplot(sns.color_palette(pkb))
In [48]:
orlgr = ListedColormap(sns.color_palette(orlg))
In [49]:
lpbl = ListedColormap(sns.color_palette(pkb))
In [50]:
fig,((ax1,ax2),(ax3,ax4),(ax5,ax6)) = plt.subplots(nrows=3,ncols=2,figsize=(10,10))

plt1 = ax1.scatter(x = attention_greater_0.Attention,
                   y = attention_greater_0.Alpha1,
                   c = attention_greater_0["user-definedlabeln"],
                   cmap=orlgr       
               )

ax1.legend(*plt1.legend_elements(),title="label");
ax1.set(xlabel="Attention")
ax1.set(ylabel="Alpha1")
ax1.set_title("Attention vs Alpha",fontdict={"fontweight":"bold"})


plt2 = ax2.scatter(x = attention_greater_0.Attention,
                   y = attention_greater_0.Beta1,
                   c = attention_greater_0["user-definedlabeln"],
                   cmap=orlgr)

ax2.legend(*plt2.legend_elements(),title="label");
ax2.set(xlabel="Attention")
ax2.set(ylabel="Beta1")
ax2.set_title("Attention vs Beta",fontdict={"fontweight":"bold"})

plt3 = ax3.scatter(x = attention_greater_0.Attention,
                   y = attention_greater_0.Delta,
                   c = attention_greater_0["user-definedlabeln"],
                   cmap=orlgr)

ax3.legend(*plt3.legend_elements(),title="label");
ax3.set(xlabel="Attention")
ax3.set(ylabel="Delta")
ax3.set_title("Attention vs Delta",fontdict={"fontweight":"bold"})

plt4 = ax4.scatter(x = attention_greater_0.Attention,
                   y = attention_greater_0.Theta,
                   c = attention_greater_0["user-definedlabeln"],
                   cmap=orlgr)

ax4.legend(*plt4.legend_elements(),title="label");
ax4.set(xlabel="Attention")
ax4.set(ylabel="Theta")
ax4.set_title("Attention vs Theta",fontdict={"fontweight":"bold"})

plt5 = ax5.scatter(x = attention_greater_0.Attention,
                   y = attention_greater_0.Gamma1,
                   c = attention_greater_0["user-definedlabeln"],
                   cmap=orlgr)
ax5.legend(*plt5.legend_elements(),title="label");
ax5.set(xlabel="Attention")
ax5.set(ylabel="Gamma1")
ax5.set_title("Attention vs Gamma",fontdict={"fontweight":"bold"})

fig.suptitle("Attention vs waves")
fig.tight_layout()
fig.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\attention-vs-waves.png")
In [51]:
meditation_greater_0 = merged_data[merged_data["Mediation"]>0.0]
meditation_greater_0.head()
Out[51]:
SubjectID VideoID Attention Mediation Raw Delta Theta Alpha1 Alpha2 Beta1 ... Gamma1 Gamma2 predefinedlabel user-definedlabeln Age Ethnicity_Bengali Ethnicity_English Ethnicity_Han Chinese Gender_F Gender_M
0 0.0 0.0 56.0 43.0 278.0 301963.0 90612.0 33735.0 23991.0 27946.0 ... 33228.0 8293.0 0.0 0.0 25 0 0 1 0 1
1 0.0 0.0 40.0 35.0 -50.0 73787.0 28083.0 1439.0 2240.0 2746.0 ... 5293.0 2740.0 0.0 0.0 25 0 0 1 0 1
2 0.0 0.0 47.0 48.0 101.0 758353.0 383745.0 201999.0 62107.0 36293.0 ... 57243.0 25354.0 0.0 0.0 25 0 0 1 0 1
3 0.0 0.0 47.0 57.0 -5.0 2012240.0 129350.0 61236.0 17084.0 11488.0 ... 49960.0 33932.0 0.0 0.0 25 0 0 1 0 1
4 0.0 0.0 44.0 53.0 -8.0 1005145.0 354328.0 37102.0 88881.0 45307.0 ... 44790.0 29749.0 0.0 0.0 25 0 0 1 0 1

5 rows × 21 columns

In [52]:
meditation_greater_0[meditation_greater_0["Mediation"]==0.0]
Out[52]:
SubjectID VideoID Attention Mediation Raw Delta Theta Alpha1 Alpha2 Beta1 ... Gamma1 Gamma2 predefinedlabel user-definedlabeln Age Ethnicity_Bengali Ethnicity_English Ethnicity_Han Chinese Gender_F Gender_M

0 rows × 21 columns

In [53]:
fig,(ax1) = plt.subplots()

ax1.scatter(meditation_greater_0.Mediation[meditation_greater_0["user-definedlabeln"]==1],
          meditation_greater_0.Alpha1[meditation_greater_0["user-definedlabeln"]==1],
          color="orange");
ax1.scatter(meditation_greater_0.Mediation[meditation_greater_0["user-definedlabeln"]==0],
          meditation_greater_0.Alpha1[meditation_greater_0["user-definedlabeln"]==0],
          color="lightgreen");

ax1.set_title("Meditation vs Alpha")
ax1.set_xlabel("Meditation")
ax1.set_ylabel("Alpha");
ax1.legend(["Confused","Not confused"]);
ax1.set_xlim((0,110));
# ax2.set_xlabel("Attention-Not Confused")
# ax2.set_ylabel("Alpha1");

# Alpha distribution in meditation
fig.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\meditation-alpha.png")
In [54]:
fig,((ax1,ax2),(ax3,ax4),(ax5,ax6)) = plt.subplots(nrows=3,ncols=2,figsize=(10,10))

plt1 = ax1.scatter(x = meditation_greater_0.Mediation,
                   y = meditation_greater_0.Alpha1,
                   c = meditation_greater_0["user-definedlabeln"],
                   cmap=lpbl       
               )

ax1.legend(*plt1.legend_elements(),title="label");
ax1.set(xlabel="Meditation")
ax1.set(ylabel="Alpha1")
ax1.set_title("Meditation vs Alpha",fontdict={"fontweight":"bold"})


plt2 = ax2.scatter(x = meditation_greater_0.Mediation,
                   y = meditation_greater_0.Beta1,
                   c = meditation_greater_0["user-definedlabeln"],
                   cmap=lpbl )

ax2.legend(*plt2.legend_elements(),title="label");
ax2.set(xlabel="Meditation")
ax2.set(ylabel="Beta1")
ax2.set_title("Meditation vs Beta",fontdict={"fontweight":"bold"})

plt3 = ax3.scatter(x = meditation_greater_0.Mediation,
                   y = meditation_greater_0.Delta,
                   c = meditation_greater_0["user-definedlabeln"],
                   cmap=lpbl )

ax3.legend(*plt3.legend_elements(),title="label");
ax3.set(xlabel="Meditation")
ax3.set(ylabel="Delta")
ax3.set_title("Meditation vs Delta",fontdict={"fontweight":"bold"})

plt4 = ax4.scatter(x = meditation_greater_0.Mediation,
                   y = meditation_greater_0.Theta,
                   c = meditation_greater_0["user-definedlabeln"],
                   cmap=lpbl)

ax4.legend(*plt4.legend_elements(),title="label");
ax4.set(xlabel="Meditation")
ax4.set(ylabel="Theta")
ax4.set_title("Meditation vs Theta",fontdict={"fontweight":"bold"})

plt5 = ax5.scatter(x = meditation_greater_0.Mediation,
                   y = meditation_greater_0.Gamma1,
                   c = meditation_greater_0["user-definedlabeln"],
                   cmap=lpbl)
ax5.legend(*plt5.legend_elements(),title="label");
ax5.set(xlabel="Meditation")
ax5.set(ylabel="Gamma1")
ax5.set_title("Meditation vs Gamma",fontdict={"fontweight":"bold"})

fig.suptitle("Meditation vs waves")
fig.tight_layout()
fig.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\meditation-vs-waves.png")

Attention vs userdefined label¶

In [55]:
pd.crosstab(attention_greater_0["user-definedlabeln"],attention_greater_0["Attention"])
Out[55]:
Attention 1.0 3.0 4.0 7.0 8.0 10.0 11.0 13.0 14.0 16.0 ... 84.0 87.0 88.0 90.0 91.0 93.0 94.0 96.0 97.0 100.0
user-definedlabeln
0.0 13 4 9 13 16 18 17 28 29 32 ... 36 38 30 27 20 9 10 5 11 19
1.0 75 13 33 16 38 44 57 56 90 79 ... 20 9 10 7 16 14 7 3 3 23

2 rows × 60 columns

In [56]:
pd.crosstab(attention_greater_0["Attention"],attention_greater_0["user-definedlabeln"]).plot(kind="bar",figsize=(15,15),color=["lightgreen","orange"])
plt.legend(["Not Confused","Confused"]);
plt.title("Attention vs Userdefined label",fontweight="bold");
plt.xlabel("Attention score")
plt.ylabel("Count");
plt.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\attention-vs-label.png")

Meditation vs userdefined label¶

In [57]:
pd.crosstab(meditation_greater_0["user-definedlabeln"],meditation_greater_0["Mediation"])
Out[57]:
Mediation 1.0 3.0 4.0 7.0 8.0 10.0 11.0 13.0 14.0 16.0 ... 84.0 87.0 88.0 90.0 91.0 93.0 94.0 96.0 97.0 100.0
user-definedlabeln
0.0 0 2 3 1 5 3 2 5 24 9 ... 43 28 23 15 20 10 13 4 7 15
1.0 22 4 11 12 11 15 19 20 23 37 ... 35 32 21 24 5 9 6 4 2 8

2 rows × 60 columns

In [58]:
pd.crosstab(meditation_greater_0["Mediation"],meditation_greater_0["user-definedlabeln"]).plot(kind="bar",
                                                                                              figsize=(15,15),
                                                                                              );
plt.legend(["Not confused","Confused"]);
plt.title("Meditation vs user-defined label",fontweight="bold")
plt.xlabel("Meditation score")
plt.ylabel("Count");
plt.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\meditation-vs-label.png")

Positive correlated waves¶

In [59]:
fig,((ax1,ax2),(ax3,ax4)) = plt.subplots(nrows=2,ncols=2,figsize=(10,6))
plot1 = ax1.scatter(merged_data["Beta2"],merged_data["Gamma1"],c=merged_data["user-definedlabeln"],cmap="autumn");
plot2 = ax2.scatter(merged_data["Beta2"],merged_data["Gamma2"],c=merged_data["user-definedlabeln"],cmap="autumn");
plot3 = ax3.scatter(merged_data["Gamma1"],merged_data["Gamma2"],c=merged_data["user-definedlabeln"],cmap="autumn");
plot4 = ax4.scatter(merged_data["Alpha2"],merged_data["Beta1"],c=merged_data["user-definedlabeln"],cmap="autumn");

ax1.legend(*plot1.legend_elements(),title="label");
ax1.set_title("Beta2 Vs Gamma1 (Corr:0.81)",fontdict={"fontweight":"bold"})
ax1.set(xlabel="Beta2",ylabel="Gamma1")

ax2.legend(*plot2.legend_elements(),title="label");
ax2.set_title("Beta2 Vs Gamma2 (Corr:0.69)",fontdict={"fontweight":"bold"})
ax2.set(xlabel="Beta2",ylabel="Gamma2");

ax3.legend(*plot3.legend_elements(),title="label");
ax3.set_title("Gamma1 Vs Gamma2 (Corr:0.74)",fontdict={"fontweight":"bold"})
ax3.set(xlabel="Gamma1",ylabel="Gamma2")

ax4.legend(*plot4.legend_elements(),title="label");
ax4.set_title("Alpha2 Vs Beta1 (Corr:0.63)",fontdict={"fontweight":"bold"})
ax4.set(xlabel="Alpha2",ylabel="Beta1")

fig.suptitle("+ve Corr",fontdict={"weight":"bold"})
fig.tight_layout()
fig.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\positive-corr.png")

Negative correlated waves¶

In [60]:
fig,(ax1,ax2) = plt.subplots(nrows=1,ncols=2,figsize=(10,6))

plot1 = ax1.scatter(merged_data["Mediation"],merged_data["Beta2"],
                   c=merged_data["user-definedlabeln"],
                   cmap="bwr")

plot2 = ax2.scatter(merged_data["Mediation"],merged_data["Gamma2"],
                  c=merged_data["user-definedlabeln"],
                  cmap="bwr")

ax1.legend(*plot1.legend_elements(),title="label")
ax1.set_title("Meditation vs Beta2 (Corr:-0.39)",fontdict={"fontweight":"bold"})
ax1.set_xlabel("Meditation")
ax1.set_ylabel("Beta2")

ax2.legend(*plot2.legend_elements(),title="label")
ax2.set_title("Meditation vs Gamma2 (Corr:-0.37)",fontdict={"fontweight":"bold"})
ax2.set_xlabel("Meditation")
ax2.set_ylabel("Gamma2")

fig.suptitle("-ve Corr")
fig.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\negative-corr.png")
fig.tight_layout()

Summary¶

Waves distribution

  • Delta – sleep,dreaming
  • Alpha – very relaxed , passive attention, reflective
  • Beta – Anxiety dominant, active, external attention , Busy
  • Theta – Deeply relaxed, inward focused , drowsiness
  • Gamma – Concentration, Problem solving
  • Attention and Meditation follows a Normal distribution since they form a bell shaped curve
  • Our data is consistent, Equal distribution of Confused(51.3%) and Not confused(48.7%) data points
  • Compared to Females, Males have high chance of being confused
  • From Univariate Analysis, Attention and Meditation column values are found to be normally distributed (fom this we can infer that we have uniform distribution of values ranging from 0 to 100)

From Correlation marix, we were able to find positive and negative correlated columns¶

  • Positive Correlated columns

Beta2,Gamma1 (Corr: +0.81)

Gamma1,Gamma2 (Corr: +0.74)

Beta2,Gamma2 (Corr: +0.69)

Alpha1,Beta1 (Corr: +0.63)

  • Negatively correlated columns

Meditation,Beta2 (Corr: -0.39)

Meditation,Gamma2 (Corr: -0.37)

From Attention vs user defined label¶

After attention value 51, we could see a majority count of students are NOT Confused Higher the attention value higher the chance of a person being NOT Confused

From Meditation vs user defined label¶

Lesser the value of Mediation, higher the chance of student being Confused

Random 100 data points plotting¶

In [61]:
att_medit_grt0 = merged_data[(merged_data["Attention"]>0.0)&(merged_data["Mediation"]>0.0)]
att_medit_grt0[(att_medit_grt0["Attention"]==0.0)|(att_medit_grt0["Mediation"]==0.0)]
Out[61]:
SubjectID VideoID Attention Mediation Raw Delta Theta Alpha1 Alpha2 Beta1 ... Gamma1 Gamma2 predefinedlabel user-definedlabeln Age Ethnicity_Bengali Ethnicity_English Ethnicity_Han Chinese Gender_F Gender_M

0 rows × 21 columns

In [62]:
#random_100 = att_medit_grt0.sample(n=100)
In [63]:
random_100 = pd.read_csv("./data generated/Random-100-samples.csv")
In [64]:
Att_random_best100 = random_100
Att_random_best100.to_csv(r"C:\Users\HP\Notebook files\IP\data generated\Random-100-samples.csv",index=False)
In [65]:
random_100.shape
Out[65]:
(100, 21)
In [66]:
plt.figure(figsize=(15,15))
corr_mat = random_100.corr()
sns.heatmap(corr_mat,square=True,annot=True,fmt='.2f',cbar=False,linewidths=0.5,cmap="YlGnBu");
plt.title("With 100 samples",pad=20.0)
plt.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\100-samples-cmtx.png")

Attention vs waves¶

In [67]:
fig,((ax1,ax2),(ax3,ax4),(ax5,ax6)) = plt.subplots(nrows=3,ncols=2,figsize=(10,10))

plot1 = ax1.scatter(x=random_100.Attention,
                    y=random_100.Alpha1,
                    c=random_100["user-definedlabeln"],
                    cmap=orlgr);

ax1.hlines(random_100.Alpha1.mean(),0,100,linestyles="dotted",color="red")
ax1.set_xlabel("Attention")
ax1.set_ylabel("Alpha1")
ax1.set_title("Attention vs Alpha")
ax1.legend(*plot1.legend_elements(),title="1-C,0-NC");

plot2 = ax2.scatter(x=random_100.Attention,
                    y=random_100.Beta1,
                    c=random_100["user-definedlabeln"],
                    cmap=orlgr)

ax2.hlines(random_100.Beta1.mean(),0,100,linestyles="dotted",color="red")
ax2.set_xlabel("Attention")
ax2.set_ylabel("Beta1")
ax2.set_title("Attention vs Beta")
ax2.legend(*plot2.legend_elements(),title="1-C,0-NC")

plot3 = ax3.scatter(x=random_100.Attention,
                    y=random_100.Theta,
                    c=random_100["user-definedlabeln"],
                    cmap=orlgr)

ax3.hlines(random_100.Theta.mean(),0,100,linestyles="dotted",color="red")
ax3.set_xlabel("Attention")
ax3.set_ylabel("Theta")
ax3.set_title("Attention vs Theta")
ax3.legend(*plot3.legend_elements(),title="1-C,0-NC")


plot4 = ax4.scatter(x=random_100.Attention,
                    y=random_100.Gamma1,
                    c=random_100["user-definedlabeln"],
                    cmap=orlgr)

ax4.hlines(random_100.Gamma1.mean(),0,100,linestyles="dotted",color="red")
ax4.set_xlabel("Attention")
ax4.set_ylabel("Gamma1")
ax4.set_title("Attention vs Gamma")
ax4.legend(*plot4.legend_elements(),title="1-C,0-NC");

plot5 = ax5.scatter(x=random_100.Attention,
                    y=random_100.Delta,
                    c=random_100["user-definedlabeln"],
                    cmap=orlgr)

ax5.hlines(random_100.Delta.mean(),0,100,linestyles="dotted",color="red")
ax5.set_xlabel("Attention")
ax5.set_ylabel("Delta")
ax5.set_title("Attention vs Delta")
ax5.legend(*plot5.legend_elements(),title="1-C,0-NC");

fig.suptitle("Attention vs Waves with 100 random data samples")
fig.tight_layout()
fig.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\attVSwaves-100Samples.png")
In [68]:
fig,((ax1,ax2),(ax3,ax4),(ax5,ax6)) = plt.subplots(nrows=3,ncols=2,figsize=(10,10))

plot1 = ax1.scatter(x=random_100.Mediation,
                    y=random_100.Alpha1,
                    c=random_100["user-definedlabeln"],
                    cmap=orlgr);

ax1.hlines(random_100.Alpha1.mean(),0,100,linestyles="dotted",color="red")
ax1.set_xlabel("Meditation")
ax1.set_ylabel("Alpha1")
ax1.set_title("Meditation vs Alpha")
ax1.legend(*plot1.legend_elements(),title="1-C,0-NC");

plot2 = ax2.scatter(x=random_100.Mediation,
                    y=random_100.Beta1,
                    c=random_100["user-definedlabeln"],
                    cmap=orlgr)

ax2.hlines(random_100.Beta1.mean(),0,100,linestyles="dotted",color="red")
ax2.set_xlabel("Meditation")
ax2.set_ylabel("Beta1")
ax2.set_title("Meditation vs Beta")
ax2.legend(*plot2.legend_elements(),title="1-C,0-NC")

plot3 = ax3.scatter(x=random_100.Mediation,
                    y=random_100.Theta,
                    c=random_100["user-definedlabeln"],
                    cmap=orlgr)

ax3.hlines(random_100.Theta.mean(),0,100,linestyles="dotted",color="red")
ax3.set_xlabel("Meditation")
ax3.set_ylabel("Theta")
ax3.set_title("Meditation vs Theta")
ax3.legend(*plot3.legend_elements(),title="1-C,0-NC")


plot4 = ax4.scatter(x=random_100.Mediation,
                    y=random_100.Gamma1,
                    c=random_100["user-definedlabeln"],
                    cmap=orlgr)

ax4.hlines(random_100.Gamma1.mean(),0,100,linestyles="dotted",color="red")
ax4.set_xlabel("Meditation")
ax4.set_ylabel("Gamma1")
ax4.set_title("Meditation vs Gamma")
ax4.legend(*plot4.legend_elements(),title="1-C,0-NC");

plot5 = ax5.scatter(x=random_100.Mediation,
                    y=random_100.Delta,
                    c=random_100["user-definedlabeln"],
                    cmap=orlgr)

ax5.hlines(random_100.Delta.mean(),0,100,linestyles="dotted",color="red")
ax5.set_xlabel("Meditation")
ax5.set_ylabel("Delta")
ax5.set_title("Meditation vs Delta")
ax5.legend(*plot5.legend_elements(),title="1-C,0-NC");

fig.suptitle("Meditation vs Waves with 100 random data samples")
fig.tight_layout()
fig.savefig(r"C:\Users\HP\Notebook files\IP\plt generated\MedVSwaves-100Samples.png")
In [69]:
## Calculate mean for Attention
attention_greater_0.Attention[attention_greater_0["Attention"]>51.0].mean()
Out[69]:
65.74642362713429
In [70]:
attention_greater_0.Attention[attention_greater_0["Attention"]<51.0].mean()
Out[70]:
33.69200779727095
In [71]:
## Calculate mean for Meditation
meditation_greater_0.Mediation[meditation_greater_0["Mediation"]>56.0].mean()
Out[71]:
68.25719810915342
In [72]:
meditation_greater_0.Mediation[meditation_greater_0["Mediation"]<56.0].mean()
Out[72]:
41.51235162014758
In [73]:
!set PATH=/Library/TeX/texbin:$PATH